1 Distribution of prey type eaten for each predator

pred_diet_dist <- data.frame(pred_species=c("Herring", "Sprat", "Cod", "Haddock", "Whiting", "Blue Whiting", "Norway Pout", "Poor Cod", "European Hake", "Monkfish", "Horse Mackerel", "Mackerel", "Common Dab", "Plaice", "Megrim", "Sole", "Boarfish"))


prey_type <- c("benthos", "fish", "nekton", "other", "zooplankton")

pred_diet_dist[ , prey_type] <- NA

#use below for loop to fill in this data frame
#then, use facet_wrap() to plot multiple pie charts from it
#species_list includes Boarfish which doesn't have values, so add 'length()-1' to account for this - i.e. doesn't analyse any Boarfish data because it's nonexistent

prey_type_list <- list("benthos", "fish", "nekton", "other", "zooplankton")

for (i in 1:(length(species_list)-1)){
  b=f=n=o=z=0
  first_species <- renamed_df %>% filter(renamed_df$pred_species == fixed(species_list[i]))
  
  benthos <- first_species[first_species$prey_type == fixed("benthos"),]
  b <- length(benthos$haul_id)
    
  fish <- first_species[first_species$prey_type == fixed("fish"),]    
  f <- length(fish$haul_id)
    
  nekton <- first_species[first_species$prey_type == fixed("nekton"),]
  n <- length(nekton$haul_id)
    
  other <- first_species[first_species$prey_type == fixed("other"),]
  o <- length(other$haul_id)
    
  zoo <- first_species[first_species$prey_type == fixed("zooplankton"),]
  z <- length(zoo$haul_id)
  
  pie(c(b,f,n,o,z), prey_type_list, main = species_list[i])
}

#ggplot(renamed_df, aes(x="", y="abc", fill=prey_type)) +
#  geom_bar(stat = "identity", width=1, position=position_fill()) +
#  coord_polar(theta="y") +
#  facet_wrap (.~renamed_df$pred_species, scale="free_y")

These are individual pie charts showing the distribution of the type of prey each predator eats.

2 Ave PPMR for individual species, weighted by prey biomass:

#Separated into individual plots for each predator species -> using facet_wrap for the variable (pred_species)
ggplot(data = renamed_df, aes(x=log(ppmr)), group=1) + 
  labs(title = "log(PPMR) v. biomass density of prey", x="log(PPMR)", y="Biomass density of prey") +
  facet_wrap(~renamed_df$pred_species, scale="free_y") + 
  theme(strip.text = element_text(size = 5)) +
  geom_density(aes(weight = prey_weight_g), colour="red")

#poor cod and boarfish?

for (i in 1:length(species_list)){
  name <- species_list[i]
  first_species <- renamed_df %>% 
    filter(pred_species == fixed(name))
  ggplot(data = first_species, aes(x=log(ppmr)), group=1) + 
          labs(title = name, x="log(PPMR)", y="biomass density of prey") +
          geom_density(aes(weight = prey_weight_g), colour="red") + 
          theme(plot.title = element_text(size=15))
  av <- weighted.mean(first_species$ppmr, w = first_species$prey_weight_g, na.rm = TRUE)
  stan_dev <- sqrt(wtd.var(first_species$ppmr, w = first_species$prey_weight_g, na.rm = TRUE))
  #make standard deviation weighted by prey biomass
  print(paste(name, av, stan_dev))
}
## [1] "Herring 19709.823009688 241475.080343766"
## [1] "Sprat 195.609172891389 4253.36606797894"
## [1] "Cod 497.290047167729 6772.41910772189"
## [1] "Haddock 3022.97716139834 21797.4455834206"
## [1] "Whiting 1264.39654448219 24534.9527990708"
## [1] "Blue Whiting 2010.93278075909 17154.1672149982"
## [1] "Norway Pout 10973.8360655567 26164.1899818578"
## [1] "Poor Cod 517.625942013013 2734.01380637647"
## [1] "European Hake 76.7901806692788 310.238947982841"
## [1] "Monkfish 62.9178507723842 100.903624078094"
## [1] "Horse Mackerel 30796.6346791468 308842.203990889"
## [1] "Mackerel 205801.946353935 817171.776698264"
## [1] "Common Dab 128.357530231156 781.069519499324"
## [1] "Plaice 587.735382021861 2347.38746472048"
## [1] "Megrim 64.4484436302096 165.770999136965"
## [1] "Sole 274.421031981985 2453.27892002065"
## [1] "Boarfish NaN NA"

Looking for the most common PPMR for each individual species.

A graph of the weighted ppmr for each species against the biomass density of the prey. Prints the mean ppmr, as weighted by prey biomass.

3 Ave PPMR for individual species, weighted by number of prey:

#Separated into individual plots for each predator species -> using facet_wrap for the variable (pred_species)
ggplot(data = renamed_df, aes(x=log(ppmr)), group=1) + 
  labs(title = "Scatter plot: log(PPMR) v. number density of prey", x="log(PPMR)", y="Number density of prey") +
  facet_wrap(~renamed_df$pred_species, scale="free_y") + 
  theme(strip.text = element_text(size = 5)) +
  geom_density(aes(weight = no._prey_per_stmch), colour="red")

for (i in 1:length(species_list)){
  name <- species_list[i]
  first_species <- renamed_df %>% 
    filter(pred_species == fixed(name))
  ggplot(data = first_species, aes(x=log(ppmr)), group=1) + 
          labs(title = name, x="log(PPMR)", y="Number density of prey") +
          geom_density(aes(weight = no._prey_per_stmch), colour="red") + 
          theme(plot.title = element_text(size=15))
  av <- weighted.mean(first_species$ppmr, w = first_species$no._prey_per_stmch, na.rm = TRUE)
  stan_dev <- sqrt(wtd.var(first_species$ppmr, w = first_species$no._prey_per_stmch, na.rm = TRUE))
  print(paste(name, av, stan_dev))
}
## [1] "Herring 4920445.2411513 7365532.72173494"
## [1] "Sprat 22863.8134490306 63519.8543048881"
## [1] "Cod 37331.0819181898 3862607.45841392"
## [1] "Haddock 143561.249495721 256951.107690553"
## [1] "Whiting 231137.0252423 452944.952135886"
## [1] "Blue Whiting 87884.0802338307 193515.083373986"
## [1] "Norway Pout 41782.5762445494 55159.3033736074"
## [1] "Poor Cod 17279.5676618823 20349.7295229756"
## [1] "European Hake 296.484038403308 2155.3411177515"
## [1] "Monkfish 214.778052908268 1238.10824158785"
## [1] "Horse Mackerel 1656066.03448337 5990392.43827448"
## [1] "Mackerel 7498547.2651935 14983797.9019753"
## [1] "Common Dab 4019.29968530871 23239.7703715824"
## [1] "Plaice 27238.3526014258 136213.922484656"
## [1] "Megrim 634.567952744918 2245.62067618099"
## [1] "Sole 32524.9032981856 300306.499870647"
## [1] "Boarfish NaN NA"

Looking for the most common PPMR for each individual species.

A graph of the weighted ppmr for each species against the number density of the prey. Prints the mean ppmr, as weighted by number of prey.

4 Specific PPMR calculations by different weightings for Herring species

first_species <- renamed_df %>% filter(pred_species == fixed("Herring"))
#separate data set of a single species

renamed_df = renamed_df %>% mutate(lppmr = log(ppmr))
#adding a column of log(ppmr) values

herringDF <- renamed_df %>% 
    filter(pred_species == fixed("Herring"))

herringmean = mean(herringDF$lppmr)
herringSD = sqrt(var(herringDF$lppmr))
#these are non-weighted means
#var is variance, i.e. ave. distance from each point to the mean
#mean is the arithmetic mean of log(ppmr)

bio_herringmean = weighted.mean(herringDF$lppmr, w = herringDF$prey_weight_g, na.rm = TRUE)
bio_herringSD = sqrt(wtd.var(herringDF$lppmr, w = herringDF$prey_weight_g, na.rm = TRUE))
#mean and standard deviation, weighted by the prey biomass


ggplot(data = first_species, aes(x=log(ppmr)), group=1) + 
          labs(title = "log(ppmr) against biomass density for Herring, weighted by prey biomass 
               (with a normal distribution, weighted by prey biomass)", x="log(PPMR)",y="Biomass density of observations") +
          geom_density(aes(weight = prey_weight_g), colour="red") + 
          theme(plot.title = element_text(size=15)) + 
          stat_function(fun = dnorm, args= with(herringDF, c(mean = bio_herringmean, sd = bio_herringSD)))

no_herringmean = weighted.mean(herringDF$lppmr, w = herringDF$no._prey_per_stmch, na.rm = TRUE)
no_herringSD = sqrt(wtd.var(herringDF$lppmr, w = herringDF$no._prey_per_stmch, na.rm = TRUE))
#weighted standard deviation found using the weighted variance
#mean and standard deviation, weighted by the number of prey

ggplot(data = first_species, aes(x=log(ppmr)), group=1) + 
          labs(title = "log(ppmr) against number density for Herring, weighted by no. of prey 
               (with a normal distribution, weighted by prey biomass)", x="log(PPMR)", y="No. density of observations") +
          geom_density(aes(weight = no._prey_per_stmch), colour="red") + 
          theme(plot.title = element_text(size=15)) +
          stat_function(fun = dnorm, args= with(herringDF, c(mean = no_herringmean, sd = no_herringSD)))

#combined graph of weighted by biomass and by number density
ggplot(data = first_species, aes(x=log(ppmr)), group=1) + 
          labs(title = "log(ppmr) against biomass/number density for Herring, 
               weighted by no. of prey", x="log(PPMR)", y="Number/biomass density of observations") +
          geom_density(aes(weight = no._prey_per_stmch), colour="red") + 
          geom_density(aes(weight = prey_weight_g), colour="blue") + 
          theme(plot.title = element_text(size=15))

#How to add key for the lines??

Plotting the distribution of Herring log(PPMR) as weighted by prey biomass and number of prey (with weighted normal distribution curves plotted over the top of each).

Prey biomass weighted mean: 6.629177 No. of prey weighted mean: 13.54102

The mean is shifted by 6.911843.

What does this mean?

5 Prey weight v. number density of prey

ggplot(data = renamed_df, aes(log(indiv_prey_weight), no._prey_per_stmch)) + 
  labs(title = "log(prey weight) v. number of prey per predator stomach", x="log(prey weight)", y="No. of prey per predator stomach") + 
  geom_point(size=0.5)

#Some interesting results --> introduce colours to show ships  

renamed_df$'haul_id_short' <- gsub("\\-.*", "", renamed_df$'haul_id')
renamed_df$'haul_id_short' <- gsub("_", "", renamed_df$'haul_id_short')
#rename haul_id values -> separate by ship names (e.g. CLYDE) rather than complete id (e.g. CLYDE-1935-6)

haul_list <- unique(renamed_df$haul_id_short)

interesting_haul <- filter(renamed_df, haul_id_short=='CLYDE'|haul_id_short=='END04'|haul_id_short=='CORY08'|haul_id_short=='LUC'|haul_id_short=='HIDDINK'|haul_id_short=='EXCmacDATSTO815'|haul_id_short=="Excmacdatsto815error")

haul_list_interesting <- unique(interesting_haul$haul_id_short)

#Vertical lines: CLYDE, BLEGVARD, CIROL05, END03, LAST, LUC (*), DUBUIT, HARDY, JOHNSON

#Curves: END04

#Horizontal lines: CORY08, HIDDINK

#Identical: EXCmacDATSTO815, Excmacdatsto815error

ggplot (data = interesting_haul, aes(x=log(indiv_prey_weight), y=no._prey_per_stmch)) + 
  labs(title = "log(prey weight) v. number of prey per stomach - separated by ship names", x="log(prey weight)", y="No. prey per stomach") + 
  geom_point(size=.1, colour="red") +
  theme(strip.text = element_text(size = 5)) + facet_wrap(~interesting_haul$haul_id_short, scale="free_y")

#create new facet_wrap() over: vertical lines, curves, horizontal lines, identical

Playing around with data to see any specific correlations; what is the distribution of the weight of prey recorded.

  1. Prey weight v. no. prey per stomach
  2. Log (prey weight) v. no. prey per stomach -> showed some interesting results, so added colours to identify individual ships
  3. Graph 3., but with points from each ship plotted on separate graphs -> note y prop. to e^-x relation for END04; lots of observations for single weights for LUC; lots of the same no. of fish observations for CORY08 and HIDDINK.

Also, I found that the EXCmacDATSTO815 and Excmacdatsto815error gave exactly the same data (which might need to be considered in later calculations).

6 prey weight v. pred weight

ggplot (data = renamed_df, aes(indiv_prey_weight, pred_weight_g)) +
  geom_point(size=0.5) + 
  labs(title = "Predator v. prey mass plot", x="Prey mass (g)", y="Predator mass (g)")

#mass since measured in g

ggplot(data = renamed_df, aes(log(indiv_prey_weight), log(pred_weight_g))) + 
  geom_point(size=0.5) + 
  labs(title = "log(Predator mass) v. log(prey mass) plot", x="log(Prey mass)", y="log(Predator mass)") + 
  stat_smooth (method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

#slope <- coef(lm(log(renamed_df$pred_weight_g)~log(renamed_df$indiv_prey_weight)))
#print(paste("slope of the log(pred) v. log(prey) line of best fit:", slope))
#second part is intercept -> how to separate?

ggplot(data = renamed_df, aes(log(indiv_prey_weight), log(pred_weight_g))) + 
  labs(title = "log(pred. mass) v. log(prey mass) separated by predator species", x="log(prey mass)", y="log(pred. mass)") + 
  geom_point(size=0.2, colour="red") + 
  facet_wrap(~pred_species, scale="free_y") + 
  theme(strip.text = element_text(size = 10))

  1. Prey weight v. predator weight -> attempting to find a link between the predator mass and the prey mass
  2. log(prey weight) v. log(pred. weight) -> using log() to see proportionality of the axes, slope of added line should = PPMR
  3. Looking to find correlation between the masses for each individual species; the slope should intersect the y-axis at 0, else our idea for PPMR calculation (pred mass is prop. to prey mass) is incorrect

7 pred weight v. ppmr

ggplot(data=renamed_df, aes(log(pred_weight_g), log(ppmr))) + 
  geom_point(size=0.5) + 
  labs(title = "log(pred mass) v. log(ppmr) plot", x="log(Pred mass)", y="log(PPMR)") + 
  stat_smooth (method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'

slope2 <- coef(lm(log(renamed_df$ppmr)~log(renamed_df$pred_weight_g)))
print(paste("slope of the log(ppmr) v. log(pred_weight) line of best fit:", slope2))
## [1] "slope of the log(ppmr) v. log(pred_weight) line of best fit: 5.18652611475397" 
## [2] "slope of the log(ppmr) v. log(pred_weight) line of best fit: 0.313443733574052"
#slope of the above plot

ggplot(data=renamed_df, aes(log(pred_weight_g), log(ppmr))) + 
  geom_point(size=0.5) +
  labs(title = "log(pred mass) v. log(ppmr) plot", x="log(Pred mass)", y="log(PPMR)") + stat_smooth (method='lm', se=FALSE) + 
  facet_wrap(~pred_species, scale="free_y") + 
  stat_smooth(method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

species_df <- renamed_df %>% filter(pred_species == fixed("Poor Cod"))

ggplot(data=species_df, aes(log(pred_weight_g), log(ppmr))) + 
  geom_point(size=0.5) +
  labs(title = "log(pred mass) v. log(ppmr) plot: Poor Cod", x="log(Pred mass)", y="log(PPMR)") + stat_smooth (method='lm', se=FALSE) + 
  facet_wrap(~pred_species, scale="free_y") + 
  stat_smooth(method='lm', se=FALSE)
## `geom_smooth()` using formula 'y ~ x'
## `geom_smooth()` using formula 'y ~ x'

species_slope <- coef(lm(log(species_df$ppmr)~log(species_df$pred_weight_g)))
print(paste("slope of the log(ppmr) v. log(pred_weight) line of best fit:", species_slope))
## [1] "slope of the log(ppmr) v. log(pred_weight) line of best fit: 6.20634582898381"   
## [2] "slope of the log(ppmr) v. log(pred_weight) line of best fit: -0.0895529388001952"
#for (i in 1:length(species_list)){
# name <- species_list[i]
 #first_species <- renamed_df %>% filter(pred_species == fixed(name))
 #grad <- coef(lm(log(first_species$ppmr)~log(first_species$pred_weight_g)))
 #print(paste(name, grad)) 
#}

log(pred mass) v. log(ppmr) -> is pred. mass related to ppmr?

We want them to not be proportional (i.e. slope = 0).

CHECK: IS THIS NO. OF POINTS RECORDED OR NO. POINTS*NO. PREY PER STOMACH

8 Introducing a mixed effects model

lmer(log(ppmr) ~ pred_species + prey_weight_g + (1|haul_id_short), data = renamed_df)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(ppmr) ~ pred_species + prey_weight_g + (1 | haul_id_short)
##    Data: renamed_df
## REML criterion at convergence: 1118461
## Random effects:
##  Groups        Name        Std.Dev.
##  haul_id_short (Intercept) 1.663   
##  Residual                  1.956   
## Number of obs: 267431, groups:  haul_id_short, 110
## Fixed Effects:
##                (Intercept)             pred_speciesCod  
##                   5.108989                    1.413988  
##     pred_speciesCommon Dab   pred_speciesEuropean Hake  
##                   0.981545                   -0.577597  
##        pred_speciesHaddock         pred_speciesHerring  
##                   1.916203                    1.617905  
## pred_speciesHorse Mackerel        pred_speciesMackerel  
##                   1.930489                    1.970072  
##         pred_speciesMegrim        pred_speciesMonkfish  
##                  -0.120421                   -0.278221  
##    pred_speciesNorway Pout          pred_speciesPlaice  
##                   2.074157                    2.128633  
##       pred_speciesPoor Cod            pred_speciesSole  
##                   0.648481                    1.741491  
##          pred_speciesSprat         pred_speciesWhiting  
##                   0.419409                    0.811182  
##              prey_weight_g  
##                  -0.003506
lmer(log(ppmr) ~ pred_species + prey_weight_g + (1|haul_id_short) + (1|year) + (1|no._prey_per_stmch) + (1|ices_rectangle), data = renamed_df)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(ppmr) ~ pred_species + prey_weight_g + (1 | haul_id_short) +  
##     (1 | year) + (1 | no._prey_per_stmch) + (1 | ices_rectangle)
##    Data: renamed_df
## REML criterion at convergence: 1023959
## Random effects:
##  Groups             Name        Std.Dev.
##  no._prey_per_stmch (Intercept) 1.7994  
##  ices_rectangle     (Intercept) 0.6634  
##  year               (Intercept) 0.6564  
##  haul_id_short      (Intercept) 1.6903  
##  Residual                       1.7773  
## Number of obs: 254595, groups:  
## no._prey_per_stmch, 4507; ices_rectangle, 718; year, 97; haul_id_short, 97
## Fixed Effects:
##                (Intercept)             pred_speciesCod  
##                   7.354137                    1.336221  
##     pred_speciesCommon Dab   pred_speciesEuropean Hake  
##                   0.500668                   -0.098397  
##        pred_speciesHaddock         pred_speciesHerring  
##                   1.582278                    1.008104  
## pred_speciesHorse Mackerel        pred_speciesMackerel  
##                   1.393272                    1.341490  
##         pred_speciesMegrim        pred_speciesMonkfish  
##                   0.432421                   -0.132208  
##    pred_speciesNorway Pout          pred_speciesPlaice  
##                   1.510116                    1.853671  
##       pred_speciesPoor Cod            pred_speciesSole  
##                   0.512387                    1.706721  
##          pred_speciesSprat         pred_speciesWhiting  
##                   0.030321                    0.870806  
##              prey_weight_g  
##                  -0.004578  
## optimizer (nloptwrap) convergence code: 0 (OK) ; 0 optimizer warnings; 1 lme4 warnings
#lmer(fixed ~ (1|random) + linear)

Fixed effect: log(ppmr)

Random effects: haul_id_short, year

Linear fixed effects: pred_species, prey_weight_g